

################################################################
##########               LOAD PACKAGES               ###########
################################################################

rm(list=ls());gc()

.libPaths("U:/R/Packages/4.0_top")
setwd("\\\\mcm-main/research/Zheng_10223/Derived Data")

library(haven)
library(readr)
library(data.table)
library(scales)
library(dplyr)
library(tidyverse)
library(tidyr)
library(stringr)
filedirec="H:/Zheng_10223/Derived Data"
`%notin%`=Negate(`%in%`)

#: for each row: one unique IMDB_ID - and their FIRST CENSUS TRACT 

# THIS FILE GETS THE FIRST CENSUS TRACT OF THE PERSON FROM THE LANDING FILE


datayears=c(1982:2019)
uniqueid=0

for(i in 1982:2005){
  print(i)
  
  
  if (i<1986){
    censyear="81"
  }
  
  else if (i>=1986 & i<1991){
    censyear="86"
  }
  
  else if (i>=1991 & i <1996){
    
    censyear="91"
  }
  else if (i>=1996 & i<2001){
    censyear="96"
  }
  else if (i>=2001 & i<2006){
    censyear="01"
  }
  else if(i>=2006 & i<2011){
    censyear="06"
  }
  else{
    censyear="16"
  }
  
  tax <- read_dta(paste("G:/IMDB_AllYears/rdc/IMDB_BDIM_2020_v1/data/stata/core_imdb/imdb_t1ff_", i, "_f3_v1.dta", sep=""), col_select=c("IMDB_ID",paste("CMA",censyear,"F",i,sep=""), paste("PR___F",i,sep=""),paste("XCMA",censyear,"F",i,sep=""),paste("CT",censyear,"_F",i,sep=""),paste("XCT",censyear,"_F",i,sep="")))
  setnames(tax, old=c("IMDB_ID",paste("CMA",censyear,"F",i,sep=""),paste("XCMA",censyear,"F",i,sep=""),paste("CT",censyear,"_F",i,sep=""),paste("XCT",censyear,"_F",i,sep=""), paste("PR___F",i,sep="")),
           new=c("IMDB_ID", "CMA_F", "XCMA_F", "CT_F", "XCT_F","PR_F"))
  tax=tax[tax$IMDB_ID %notin% uniqueid,] # get people who weren't in previous year
  
  
  tax$cens=censyear
  tax$taxyear=i  # taxyear = first year we see them 
  
  # works for 1982, 1983
  
  if(i==1982){
    taxout=tax
  }
  else{
    taxout=rbind(taxout,tax)
  }
  # unique indivividuals here [don't get anyone who is already in the main dataset]
  uniqueid=unique(taxout$IMDB_ID)  
  taxout
  
}

# Fwrite
dftract=taxout
fwrite(taxout, paste(filedirec, "Census/landing_tract1982_2005.csv",sep=""))

####################################################

# census years: 1986, 1991, 1996, 2006 
dftract$censyear[dftract$cens==81]=1986
dftract$censyear[dftract$cens==86]=1986
dftract$censyear[dftract$cens==91]=1991
dftract$censyear[dftract$cens==96]=1996
dftract$censyear[dftract$cens==1]=2006

# only want people who have census years in 1986:2001

# get tract number before the decimal
dftract$firsttractpart=as.integer(dftract$CT_F)
########### Import enclave computation from census
enclaves <- fread("H:/Zheng_10223/Joint/Census/tract_vismin_all.csv")
enclaves[, uniquetract := str_pad(as.character(uniquetract), width = 7L, pad = '0')]
enclaves[, year := as.character(year)]
enclaves[, code := paste0(year,uniquetract)]




dftract[, CT_F := as.integer(CT_F)]
dftract[, cma := str_pad(as.character(CMA_F), width = 3L, pad = '0')]
dftract[, ct  := str_pad(as.character(CT_F), width = 4L, pad = '0')]

dftract[, uniquetract := paste0(cma,ct)]

dftract[, .N, .(cens)][order(cens)]
dftract <- dftract[cens!=81]
dftract[, year := as.character(cens+1900)]
dftract[year=="1901", year := "2001"]
dftract[, .N, .(year)][order(year)]

dftract[, code := paste0(year,uniquetract)]

new <- enclaves[dftract, on = "code"]

new[, .N, .(year)][order(year)]

new[, missing := is.na(ratio_black)]
new[, .N, .(missing)]
###THIS GIVES US THE MATCHING RATE


# clean enclaves: just need the ratios
enclaves_clean=enclaves[,c("ratio_black","ratio_sa","ratio_ea","ratio_sea","ratio_pi","ratio_wa","ratio_la","ratio_multi","ratio_white","ratio_aborig","code","year")]

# Merge based on code 

dftract$censyear=as.numeric(dftract$censyear); enclaves_clean$censyear=as.numeric(enclaves_clean$year)
dftractmerge=merge(dftract[,c("IMDB_ID","censyear","code")],enclaves_clean,by=c("code","censyear"),all.x=TRUE)

fwrite(dftractmerge,paste(filedirec,"dftractenclave.csv",sep=""))

